home *** CD-ROM | disk | FTP | other *** search
- # $Id: PerlSAX.pm,v 1.6 2000/02/28 10:40:21 matt Exp $
-
- package XML::XPath::PerlSAX;
- use XML::XPath::XMLParser;
- use strict;
-
- sub new {
- my $class = shift;
- my %args = @_;
- bless \%args, $class;
- }
-
- sub parse {
- my $self = shift;
-
- die "XML::XPath::PerlSAX: parser instance ($self) already parsing\n"
- if (defined $self->{ParseOptions});
-
- # If there's one arg and it's an array ref, assume it's a node we're parsing
- my $args;
- if (@_ == 1 && ref($_[0]) =~ /^(text|comment|element|namespace|attribute|pi)$/) {
- # warn "Parsing node\n";
- my $node = shift;
- # warn "PARSING: $node ", XML::XPath::XMLParser::as_string($node), "\n\n";
- $args = { Source => { Node => $node } };
- }
- else {
- $args = (@_ == 1) ? shift : { @_ };
- }
-
- my $parse_options = { %$self, %$args };
- $self->{ParseOptions} = $parse_options;
-
- # ensure that we have at least one source
- if (!defined $parse_options->{Source} ||
- !defined $parse_options->{Source}{Node}) {
- die "XML::XPath::PerlSAX: no source defined for parse\n";
- }
-
- # assign default Handler to any undefined handlers
- if (defined $parse_options->{Handler}) {
- $parse_options->{DocumentHandler} = $parse_options->{Handler}
- if (!defined $parse_options->{DocumentHandler});
- }
-
- # ensure that we have a DocumentHandler
- if (!defined $parse_options->{DocumentHandler}) {
- die "XML::XPath::PerlSAX: no Handler or DocumentHandler defined for parse\n";
- }
-
- # cache DocumentHandler in self for callbacks
- $self->{DocumentHandler} = $parse_options->{DocumentHandler};
-
- if ((ref($parse_options->{Source}{Node}) eq 'element') &&
- !($parse_options->{Source}{Node}->[node_parent])) {
- # Got root node
- $self->{DocumentHandler}->start_document( { } );
- $self->parse_node($parse_options->{Source}{Node});
- return $self->{DocumentHandler}->end_document( { } );
- }
- else {
- $self->parse_node($parse_options->{Source}{Node});
- }
-
- # clean up parser instance
- delete $self->{ParseOptions};
- delete $self->{DocumentHandler};
-
- }
-
- sub parse_node {
- my $self = shift;
- my $node = shift;
- # warn "parse_node $node\n";
- if (ref($node) eq 'element' && $node->[node_parent]) {
- # bundle up attributes
- my @attribs;
- foreach my $attr (@{$node->[node_attribs]}) {
- if ($attr->[node_prefix]) {
- push @attribs, $attr->[node_prefix] . ":" . $attr->[node_key];
- }
- else {
- push @attribs, $attr->[node_key];
- }
- push @attribs, $attr->[node_value];
- }
-
- $self->{DocumentHandler}->start_element(
- { Name => $node->[node_name],
- Attributes => \@attribs,
- }
- );
- foreach my $kid (@{$node->[node_children]}) {
- $self->parse_node($kid);
- }
- $self->{DocumentHandler}->end_element(
- {
- Name => $node->[node_name],
- }
- );
- }
- elsif (ref($node) eq 'text') {
- $self->{DocumentHandler}->characters($node->[node_text]);
- }
- elsif (ref($node) eq 'comment') {
- $self->{DocumentHandler}->comment($node->[node_comment]);
- }
- elsif (ref($node) eq 'pi') {
- $self->{DocumentHandler}->processing_instruction(
- {
- Target => $node->[node_target],
- Data => $node->[node_data]
- }
- );
- }
- elsif (ref($node) eq 'element') { # root node
- # just do kids
- foreach my $kid (@{$node->[node_children]}) {
- $self->parse_node($kid);
- }
- }
- else {
- die "Unknown node type: '", ref($node), "' ", scalar(@$node), "\n";
- }
- }
-
- 1;
-
- __END__
-
- =head1 NAME
-
- XML::XPath::PerlSAX - A PerlSAX event generator for my wierd node structure
-
- =head1 SYNOPSIS
-
- use XML::XPath;
- use XML::XPath::PerlSAX;
- use XML::DOM::PerlSAX;
-
- my $xp = XML::XPath->new(filename => 'test.xhtml');
- my $paras = $xp->find('/html/body/p');
-
- my $handler = XML::DOM::PerlSAX->new();
- my $generator = XML::XPath::PerlSAX->new( Handler => $handler );
-
- foreach my $node ($paras->get_nodelist) {
- my $domtree = $generator->parse($node);
- # do something with $domtree
- }
-
- =head1 DESCRIPTION
-
- This module generates PerlSAX events to pass to a PerlSAX handler such
- as XML::DOM::PerlSAX. It operates specifically on my wierd tree format.
-
- Unfortunately SAX doesn't seem to cope with namespaces, so these are
- lost completely. I believe SAX2 is doing namespaces.
-
- =head1 Other
-
- The XML::DOM::PerlSAX handler I tried was completely broken (didn't even
- compile before I patched it a bit), so I don't know how correct this
- is or how far it will work.
-
- This software may only be distributed as part of the XML::XPath package.
-